{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>列0</th>\n",
       "      <th>列1</th>\n",
       "      <th>列2</th>\n",
       "      <th>列3</th>\n",
       "      <th>列4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>行0</th>\n",
       "      <td>-1.670463</td>\n",
       "      <td>-0.054807</td>\n",
       "      <td>0.766677</td>\n",
       "      <td>1.664175</td>\n",
       "      <td>-0.140406</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>行1</th>\n",
       "      <td>0.763584</td>\n",
       "      <td>-1.226076</td>\n",
       "      <td>-0.111150</td>\n",
       "      <td>1.336746</td>\n",
       "      <td>-2.309379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>行2</th>\n",
       "      <td>-0.626412</td>\n",
       "      <td>0.347270</td>\n",
       "      <td>0.667558</td>\n",
       "      <td>-0.300503</td>\n",
       "      <td>0.561218</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          列0        列1        列2        列3        列4\n",
       "行0 -1.670463 -0.054807  0.766677  1.664175 -0.140406\n",
       "行1  0.763584 -1.226076 -0.111150  1.336746 -2.309379\n",
       "行2 -0.626412  0.347270  0.667558 -0.300503  0.561218"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "d1 = np.random.normal(loc=0, scale=1, size=(3,5)) # 0为轴，1为波动周期，产生3行5列数据\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "row_idx = ['行{}'.format(i) for i in range(3)]\n",
    "col_idx = ['列{}'.format(i) for i in range(5)]\n",
    "\n",
    "df = pd.DataFrame(d1, index=row_idx, columns=col_idx)  # 产生一个表格, 分别设置行索引和列索引\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3, 5)\n",
      "Index(['行0', '行1', '行2'], dtype='object')\n",
      "Index(['列0', '列1', '列2', '列3', '列4'], dtype='object')\n",
      "[[-1.67046261 -0.05480712  0.76667703  1.66417538 -0.14040584]\n",
      " [ 0.76358371 -1.22607582 -0.11115004  1.33674602 -2.30937901]\n",
      " [-0.62641235  0.3472698   0.66755755 -0.30050309  0.56121795]]\n"
     ]
    }
   ],
   "source": [
    "# DataFrame的属性\n",
    "print(df.shape)\n",
    "print(df.index)\n",
    "print(df.columns)\n",
    "print(df.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>列0</th>\n",
       "      <th>列1</th>\n",
       "      <th>列2</th>\n",
       "      <th>列3</th>\n",
       "      <th>列4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>行0</th>\n",
       "      <td>-1.670463</td>\n",
       "      <td>-0.054807</td>\n",
       "      <td>0.766677</td>\n",
       "      <td>1.664175</td>\n",
       "      <td>-0.140406</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>行1</th>\n",
       "      <td>0.763584</td>\n",
       "      <td>-1.226076</td>\n",
       "      <td>-0.111150</td>\n",
       "      <td>1.336746</td>\n",
       "      <td>-2.309379</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          列0        列1        列2        列3        列4\n",
       "行0 -1.670463 -0.054807  0.766677  1.664175 -0.140406\n",
       "行1  0.763584 -1.226076 -0.111150  1.336746 -2.309379"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(2) # 默认前5行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>列0</th>\n",
       "      <th>列1</th>\n",
       "      <th>列2</th>\n",
       "      <th>列3</th>\n",
       "      <th>列4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>行2</th>\n",
       "      <td>-0.626412</td>\n",
       "      <td>0.34727</td>\n",
       "      <td>0.667558</td>\n",
       "      <td>-0.300503</td>\n",
       "      <td>0.561218</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          列0       列1        列2        列3        列4\n",
       "行2 -0.626412  0.34727  0.667558 -0.300503  0.561218"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail(1) # 默认后5行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2013</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2015</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2017</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   year  month  day\n",
       "0  2010      2    1\n",
       "1  2013      4    3\n",
       "2  2015      6    5\n",
       "3  2017      8    7"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df.reset_index([drop=True/False]) 重置索引\n",
    "# 索引修改无法通过下标修改，只能全部修改 df.index = index_arr\n",
    "# 字典形式的dataframe\n",
    "cal = pd.DataFrame({'year':[2010,2013,2015,2017], 'month':[2,4,6,8], 'day':[1,3,5,7]})\n",
    "cal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4, 4, 4)"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mdx = pd.MultiIndex.from_frame(cal)\n",
    "mdx.nlevels\n",
    "mdx.names\n",
    "mdx.levshape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2013</th>\n",
       "      <th>4</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015</th>\n",
       "      <th>6</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2017</th>\n",
       "      <th>8</th>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            day\n",
       "year month     \n",
       "2010 2        1\n",
       "2013 4        3\n",
       "2015 6        5\n",
       "2017 8        7"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# cal.set_index('year') # 设置索引\n",
    "cal.set_index(['year','month'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=4, step=1)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# dir(cal.index)\n",
    "cal.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'numpy.ndarray'> 2\n",
      "<class 'list'> 2\n",
      "<class 'list'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "MultiIndex([(1,  'red'),\n",
       "            (1, 'blue'),\n",
       "            (2,  'red'),\n",
       "            (2, 'blue')],\n",
       "           names=['a', 'b'])"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# panel 三维数据容器， 0.20.x已废弃, 使用MultiIndex代替\n",
    "# arange(24) 产生0-23个数变换为三位数组长宽高4，3，2。\n",
    "ar3d = np.arange(8).reshape(2,2,2) # ndarray\n",
    "print(type(ar3d), len(ar3d))\n",
    "print(type(ar3d.tolist()), len(ar3d.tolist())) # ndarray转化为对象\n",
    "arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]\n",
    "print(type(arrays))\n",
    "mdx = pd.MultiIndex.from_arrays(arrays, names=('a','b'))\n",
    "mdx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "col:0    0\n",
       "col:1    1\n",
       "col:2    2\n",
       "col:3    3\n",
       "col:4    4\n",
       "Name: index:0, dtype: int32"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series 行索引对象\n",
    "df = pd.DataFrame(np.arange(15).reshape(3,5), index=['index:{}'.format(i) for i in range(3)], columns=['col:{}'.format(i) for i in range(5)])\n",
    "s = df.iloc[0, :]  # 取第0行所有列，返回一个series对象\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['col:0', 'col:1', 'col:2', 'col:3', 'col:4'], dtype='object')"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 2, 3, 4])"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "row:0    3\n",
       "row:1    4\n",
       "row:2    5\n",
       "row:3    6\n",
       "row:4    7\n",
       "row:5    8\n",
       "dtype: int32"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series(np.arange(3,9)) # 不指定索引名称，索引默认为0-x\n",
    "pd.Series(np.arange(3,9), index=['row:{}'.format(i) for i in range(6)]) #指定索引名称"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>18</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "0  2010101   12    23   14\n",
       "1  2010102   15    21   16\n",
       "2  2010103   18    20   19"
      ]
     },
     "execution_count": 180,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读取csv数据文件，csv: comma sepreated value\n",
    "data = pd.read_csv('demo.csv')\n",
    "# data = pd.read_csv('demo.csv', header=0, index_col=0) # 第0行是头，第0列示索引列\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>18</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "1  2010102   15    21   16\n",
       "2  2010103   18    20   19"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# data.drop(['avg'], axis=1) # 删除不想显示的列\n",
    "data.drop([0], axis=0) # 删除不想显示的行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2010102"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# DataFrame 索引操作\n",
    "type(data) # pandas.core.frame.DataFrame\n",
    "data['date'][1] # 先行后列访问数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2010102"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.loc[1]['date'] # loc名字索引，先行后列\n",
    "data.loc[1, 'date'] # loc名字索引，先行后列，与上面的等效"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.iloc[1,1] # 根据位置索引 iloc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>low</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   low\n",
       "0   12\n",
       "1   15"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.iloc[:2, 1:2] # 获取前两行 第2列数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>10</td>\n",
       "      <td>23</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>10</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "0  2010101   10    23   14\n",
       "1  2010102   10    21   16\n",
       "2  2010103   10    20   19"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 赋值操作\n",
    "data['low'] = 10 # 整列赋值\n",
    "data.low = 10 # 与上一行相同\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>10</td>\n",
       "      <td>23</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "0  2010101   10    23   14\n",
       "1  2010102   15    21   16\n",
       "2  2010103   10    20   19"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.iloc[1,1] = 15 # 2行2列的值进行赋值\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>10</td>\n",
       "      <td>23</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "2  2010103   10    20   19\n",
       "0  2010101   10    23   14\n",
       "1  2010102   15    21   16"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 排序 下面的方法在Series对象中也可以使用\n",
    "data.sort_values(by='high', ascending=True) # 根据high递增\n",
    "data.sort_values(by=['low', 'high'], ascending=True) # 根据low, high递增"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>10</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>10</td>\n",
       "      <td>23</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "2  2010103   10    20   19\n",
       "1  2010102   15    21   16\n",
       "0  2010101   10    23   14"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.sort_index(ascending=False) # 根据索引排序"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    13\n",
       "1     6\n",
       "2    10\n",
       "dtype: int64"
      ]
     },
     "execution_count": 155,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# DataFrame Series的算术运算、逻辑运算、统计运算\n",
    "# 下面3中方式相同，对low列每个值+3\n",
    "data.low + 2\n",
    "data['low'] + 2\n",
    "data['low'].add(2) \n",
    "# data中的两列对应相减 每行上的对应值相减\n",
    "data.high.sub(data.low)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    False\n",
       "1     True\n",
       "2    False\n",
       "Name: low, dtype: bool"
      ]
     },
     "execution_count": 156,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 逻辑运算\n",
    "data.low > 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "1  2010102   15    21   16"
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data.low > 10] # 只显示low>10的数据 布尔索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>18</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "2  2010103   18    20   19"
      ]
     },
     "execution_count": 185,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[(data.low > 10) & (data.high < 21)] # 布尔索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>18</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "2  2010103   18    20   19"
      ]
     },
     "execution_count": 186,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.query('low > 10 & high < 21') # 查询函数 与上面的布尔索引效果相同，但是更简洁"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low  high  avg\n",
       "1  2010102   15    21   16"
      ]
     },
     "execution_count": 188,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data.low.isin([15])] # isin函数，值是否出现在列表里面"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2010102.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>21.333333</td>\n",
       "      <td>16.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.527525</td>\n",
       "      <td>2.516611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>2010101.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>14.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>2010101.5</td>\n",
       "      <td>13.5</td>\n",
       "      <td>20.500000</td>\n",
       "      <td>15.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2010102.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>16.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>2010102.5</td>\n",
       "      <td>16.5</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>17.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>2010103.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>19.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            date   low       high        avg\n",
       "count        3.0   3.0   3.000000   3.000000\n",
       "mean   2010102.0  15.0  21.333333  16.333333\n",
       "std          1.0   3.0   1.527525   2.516611\n",
       "min    2010101.0  12.0  20.000000  14.000000\n",
       "25%    2010101.5  13.5  20.500000  15.000000\n",
       "50%    2010102.0  15.0  21.000000  16.000000\n",
       "75%    2010102.5  16.5  22.000000  17.500000\n",
       "max    2010103.0  18.0  23.000000  19.000000"
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 统计运算\n",
    "data.describe() # 统计每列出现的次数、均值、最大、最小、方差等信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    2010101\n",
       "1    2010102\n",
       "2    2010103\n",
       "dtype: int64"
      ]
     },
     "execution_count": 192,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# data.max()\n",
    "data.max(axis=1) # 按行获取最大值，默认按列，按行意义不大"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "date    2\n",
       "low     2\n",
       "high    0\n",
       "avg     2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 193,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.idxmax() # 每列最大值所在的索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    12\n",
       "1    27\n",
       "2    45\n",
       "Name: low, dtype: int64"
      ]
     },
     "execution_count": 196,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.low.cumsum() # 累计和，第2个是第一个和第二个值得和，第三个是前三个值得和以此类推"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xe3b3a48>"
      ]
     },
     "execution_count": 240,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAeZklEQVR4nO3dd3SUZf738fdF752EUEJAOiS0gA0VOyJFmmV1F8su6/n9nn18fruiFAVFBSzr2gvLquzq7iodu9LsoqCmUZIAgQAhoYcACcnM9fwx42+zLGUCM3PPPfN5ncNJptyZ7xkuPn6c3DOXsdYiIiLuU83pAURE5OwowEVEXEoBLiLiUgpwERGXUoCLiLhUjXA+WIsWLWxSUlI4H1JExPXWrVu311rb8sTrwxrgSUlJrF27NpwPKSLiesaYbSe7Xi+hiIi4lAJcRMSlFOAiIi6lABcRcSkFuIiISynARURcSgEuIuJSCnARkRDaf+Q4Dy3Lori0POg/O6xv5BERiRXWWpb8tJNH3ttA8bFyBnVqwVU94oP6GApwEZEg277vKFOXZPBFzl76JjZh9ugUurZqGPTHUYCLiARJucfLX77cyjPLs6lRrRozRvbk1vPbU72aCcnjKcBFRIIgLf8gkxZlsKGgmKt7xDNjZE8SGtcN6WMqwEVEzsGRsgr++Ek2b3y9lRYNavPKbf0Z0qtVWB5bAS4icpZWbizkwSVZ7Dx4jNsuSOS+Id1oVKdm2B5fAS4iUkVFh0t5+N31vJ9eQOe4Biy4+0JSk5qFfQ4FuIhIgLxeyztr85n5wQZKy7384eou/Pay86hVw5m31CjARUQCkFtUwpTFGXy3dT8DOzRj1uhkzmvZwNGZFOAiIqdxvMLLK59t5oWVudSpWY3HxyQzrn87qoXo1MCqUICLiJzC2rz9TF6UQU5RCcNSEpg2vAdxDes4Pdb/UoCLiJyguLScxz/cyFtrttOmSV1euz2VK7oF923wwaAAFxHxs9bycdZupi3NYm9JGXcN6sDvr+5C/dqRGZWROZWISJgVHDrGtKVZfLq+kB4JjZg7PpWUtk2cHuu0FOAiEtM8Xsub327jyY83UeH1Mvm6btw5qAM1q0f+p20rwEUkZm3cXczkRRn8uP0gl3RuwcxRybRrVs/psQIWUIAbY5oAc4FegAXuBDYBbwNJQB5wo7X2QEimFBEJotJyD8+vzOHVz7bQqG5NnrmpDyP7tMYY508NrIpAG/izwEfW2rHGmFpAPWAKsMJaO9sYMwmYBNwfojlFRILi69y9TFmcQd6+o4zt35apQ7vTtH4tp8c6K2cMcGNMI+BS4HYAa+1x4LgxZiQw2H+3ecBqFOAiEqEOHDnOzA82MH/dDto3r8dbvz6fizu1cHqscxJIA+8I7AFeN8b0BtYB9wDx1toCAGttgTEmLnRjioicHWsty9J2MePd9Rw6Vs5/DT6P/3tlZ+rUrO70aOcskACvAfQDfmetXWOMeRbfyyUBMcZMACYAJCYmntWQIiJnI3//UaYuyeTz7D30bteEN0cn0z2hkdNjBU0gAb4D2GGtXeO/vABfgBcaYxL87TsBKDrZwdbaOcAcgNTUVBuEmUVETqvC4+W1r7by9KfZVDeGh4b34JcXJoVsazOnnDHArbW7jTH5xpiu1tpNwJXAev+f8cBs/9elIZ1URCQAGTsOMWlROlm7irmqexwzRvaidZPQbm3mlEDPQvkd8Jb/DJQtwB1ANeAdY8xdwHZgXGhGFBE5syNlFfzp02xe+2orzRvU5uVb+zGkVyvXnRpYFQEFuLX2JyD1JDddGdxxRESqbtWmIh5YnMnOg8f4xfmJ3D+kG43rhm9rM6fonZgi4lp7Dpcx4731vJu2i05xDZh/94UMcGBrM6cowEXEday1zF+7g8c+2MCx4x7+56ou3D24I7VruP/UwKpQgIuIq2zZ49va7Nst+xmY1IyZo3vRKa6h02M5QgEuIq5wvMLLq59t5vlVudSuUY1Zo5O5KTUytjZzigJcRCLeum0HmLwonezCEq5PSWD6sB7ENYqcrc2cogAXkYhVXFrOkx9t4s0120hoVIe5v0rlqh6Rt7WZUxTgIhKRfFubZVJ0uIzbL0riD9d0pUGEbm3mFD0bIhJRdh8qZfqyTD7OKqRbq4a8+stU+rSL7K3NnKIAF5GI4PVa3lqzjcc/2kS5x8v9Q7rx60vcsbWZUxTgIuK47MLDTFqYzg/bDzKoUwseG9WL9s3rOz1WxFOAi4hjSss9vLgql1c+20yD2jV4+sbejOrbJqo/vySYFOAi4ohvNu9jyuIMtu49wui+bZh6fXeaN6jt9FiuogAXkbA6ePQ4sz7YyNtr80lsVo+/3TWQSzq3dHosV1KAi0hYWGt5N72AGe9mceBoOXdfdh73XNmZurVi6/NLgkkBLiIhl7//KA8uzWT1pj2ktG3MvDsH0rN1Y6fHcj0FuIiETIXHyxtf5/HHT7IxBqYN68H4i6JvazOnKMBFJCQydx5i8qIMMnYe4opucTxyQy/aROnWZk5RgItIUB09XsEzy3P4y5dbaVqvFi/+oh9Dk6N7azOnKMBFJGg+y97DA0syyN9/jFsGtmPSkO40rhf9W5s5RQEuIudsb0kZj763niU/7aJjy/q8PeECzu/Y3Omxop4CXETOmrWWBet8W5sdKavgnis781+XnxdzW5s5RQEuImclb+8RpizO4OvN+0ht35RZo5PpHB+bW5s5RQEuIlVS7vEy5/MtPLcih1rVq/HYqF7cMiAxprc2c4oCXEQC9uP2A0xelMHG3YcZmtyK6cN7Eq+tzRyjABeRMyopq+Cpjzcx75s84hvW4c+/SuVqbW3mOAW4iJzWp+sLmbY0k93FpYy/MIk/XNOFhnV0amAkUICLyEkVFZcyfVkWH2buplurhrx0az/6JjZ1eiypRAEuIv/G67X84/vtzP5wI2UVXiZe25UJl3bU1mYRSAEuIv8rp/AwkxdlsHbbAS46rzmPjUqmQwttbRapFOAiQmm5h5dWb+bl1bnUr12Dp8b1Zkw/bW0W6RTgIjFuzZZ9TF6cwZY9R7ihT2seGNaDFtrazBUU4CIx6tDRcmZ9uIF/fp9P26Z1mXfnQC7roq3N3EQBLhJjrLW8n1HAQ8vWc+DocX57aUfuuaoz9WopDtxGf2MiMWTnwWM8uCSTlRuLSG7TmDfuGECvNtrazK0U4CIxwOO1zPs6j6c+2YS18MD13bn9oiRq6NRAV1OAi0S5rF2+rc3SdxxicNeWPDKyF+2a1XN6LAkCBbhIlDp23MMzK7KZ+8VWmtaryXO39GV4SoJODYwiCnCRKPRFzh6mLs5k+/6j3JTajslDu9GkXi2nx5IgU4CLRJF9JWU89v4GFv24k44t6vOP31zAhedpa7NopQAXiQLWWhb9sJNH31/P4dIKfndFJ/778k7UqamtzaJZQAFujMkDDgMeoMJam2qMaQa8DSQBecCN1toDoRlTRE5l274jTF2cyZe5e+mX2IRZo1Po2kpbm8WCqjTwy621eytdngSssNbONsZM8l++P6jTicgplXu8zP1iK88sz6Zm9Wo8ckMvbh2orc1iybm8hDISGOz/fh6wGgW4SFj8lH+QSQvT2bj7MNf2jOfhEb1o1Vhbm8WaQAPcAp8YYyzwqrV2DhBvrS0AsNYWGGPiTnagMWYCMAEgMTExCCOLxK6Ssgr++Mkm3vg6j7iGtXnltv4M6dXK6bHEIYEG+MXW2l3+kP7UGLMx0Afwh/0cgNTUVHsWM4oIsGJDIQ8uyaSguJRfXtCeidd21dZmMS6gALfW7vJ/LTLGLAYGAoXGmAR/+04AikI4p0jMKiou5eF31/N+RgFd4huw4BcX0b+9tjaTAALcGFMfqGatPez//hpgBrAMGA/M9n9dGspBRWKN12t5e20+Mz/YQFmFl3uv6cKES8+jVg19fon4BNLA44HF/rff1gD+bq39yBjzPfCOMeYuYDswLnRjisSW3KISpizK4Lu8/VzQsRkzRyXTsWUDp8eSCHPGALfWbgF6n+T6fcCVoRhKJFaVVXh4efVmXlq1mbq1qvPE2BTG9W+rzy+Rk9I7MUUixPd5+5m8KIPcohJG9G7NtOHa2kxOTwEu4rBDx8p5/KON/H3Ndto0qcvrdwzg8q4nPStX5N8owEUcYq3lw8zdTF+Wxb6SMn5zSQf+5+ou2tpMAqaVIuKAXQePMW1pFss3FNKzdSNeGz+A5Lba2kyqRgEuEkYer+Vv3+Tx5Meb8FqYOrQ7d1ysrc3k7CjARcJkQ0ExkxZlkJZ/kEu7tOSxG7S1mZwbBbhIiJWWe3huRQ5zPt9C47o1efbmPozo3VqnBso5U4CLhNBXuXuZsjiDbfuOMq5/W6YM7U7T+traTIJDAS4SAgeOHOfR9zew8IcdJDWvx99/fT4XdWrh9FgSZRTgIkFkrWXpT7uY8d56io+V89+Xn8fvruisrc0kJBTgIkGyfd9Rpi7J4IucvfRp14TZY5Lp1qqR02NJFFOAi5yjCo+Xv3y5lT8tz6ZGtWrMGNmTW89vT3VtbSYhpgAXOQfpOw4yaWEG6wuKuap7PI/c0JOExnWdHktihAJc5CwcKavg6U+zef2rrbRoUJtXbuvHtT1b6dRACSsFuEgVrdpYxANLMtl58Bi3XZDIfUO60Uhbm4kDFOAiASo6XMqMd9fzXnoBneIasODuC0lNaub0WBLDFOAiZ2Ct5Z21+Tz2/gZKy738/uou/PayjtSuoVMDxVkKcJHT2LzHt7XZmq37GdihGbNGJ3OetjaTCKEAFzmJ4xVeXvlsMy+szKVOzWrMHp3MjantqKZTAyWCKMBFTrBu234mLcwgp6iEYSkJTBveg7iGdZweS+Q/KMBF/IpLy3nio428+a1va7PXbk/lim7xTo8lckoKcBHgo8wCpi3NYm9JGXcN6sDvr+5C/dr65yGRTStUYtrekjKmLc3kg4zd9EhoxNzxqaS0beL0WCIBUYBLTLLW8l56AdOWZnKkzMN9Q7rym0s6UlNbm4mLKMAl5uw5XMaDSzL5KGs3vds14amxKXSOb+j0WCJVpgCXmGGt5d30AqYvzeTIcQ+TruvGrwd10IbC4loKcIkJew6X8cCSDD7OKqR3uyb8cVwKneLUusXdFOAS1ay1LEvbxfRlWRw97mHydd24S61booQCXKJW0eFSHlicySfrC+nTrglPqXVLlFGAS9Q5sXVPGdqNuwZ11A45EnUU4BJViopLmbokk0/XF9I3sQlPju1Npzh9+JREJwW4RIWfd4OfviyL0nIPU4d2585BHdS6JaopwMX1iopLmbI4k+UbCumX2IQnx/XWR75KTFCAi2tZa1ny004eWrae0nIPD1zfnTsuVuuW2KEAF1cqLC5l6uIMlm8oon/7pjw5NoWOat0SYxTg4irWWhb/uJOHlmVRVuFV65aYpgAX1ygsLmXKogxWbCwitX1TnlDrlhinAJeIZ61l0Q87efjdLI57vDw4rAe3X5Sk1i0xTwEuEW33oVKmLM5g5cYiBiQ15YmxvenQor7TY4lEhIAD3BhTHVgL7LTWDjPGNAPeBpKAPOBGa+2BUAwpscday4J1O5jx3nrKPV6m+Vu3NhUW+ZeqfKLPPcCGSpcnASustZ2BFf7LIuds96FS7nzjeyYuSKdbq4Z8eM+l3Dmog8Jb5AQBNXBjTFvgeuAx4Pf+q0cCg/3fzwNWA/cHdzyJJSe27unDezD+QrVukVMJ9CWUZ4D7gMof5RZvrS0AsNYWGGPiTnagMWYCMAEgMTHxHEaVaFZw6BiTF2WwetMeBnZoxhNjUkjSa90ip3XGADfGDAOKrLXrjDGDq/oA1to5wByA1NRUW+UJJapZa5m/dgePvLeeCq/loeE9+JVat0hAAmngFwMjjDFDgTpAI2PMm0ChMSbB374TgKJQDirRp+DQMSYtzOCzbF/rfnJsCu2bq3WLBOqMv8S01k621ra11iYBNwMrrbW3AcuA8f67jQeWhmxKiSrWWt75Pp9rnv6c77bu5+ERPfnnby5QeItU0bmcBz4beMcYcxewHRgXnJEkmu06eIxJizL4PHsP53doxpNje5PYvJ7TY4m4UpUC3Fq7Gt/ZJlhr9wFXBn8kiUbWWt7+Pp9H39+A11pmjOzJbee312vdIudA78SUkNt58BiTFqbzRc5eLujYjCfGqHWLBIMCXELGWss/v8/nMX/rfmRkT25V6xYJGgW4hETl1n1hx+Y8MTaFds3UukWCSQEuQWWt5R/f5TPzA3/rvqEXtw5MVOsWCQEFuATNjgNHmbQwgy9z93LRec15fIxat0goKcDlnFlr+ft325n5vu+zzh69oRe/UOsWCTkFuJyTyq374k7NmT1arVskXBTgclastby1ZjuzPvC17sdG+Vq3MWrdIuGiAJcqy99/lPsXpvP15n0M6tSC2WOSadtUrVsk3BTgEjCv1/LWd77WbYCZo5K5ZWA7tW4RhyjAJSD5+49y34J0vtmyj0s6t2DWaLVuEacpwOW0vF7LW2u2MevDjVQzhlmjk7l5gFq3SCRQgMspndi6Z49JoU2Tuk6PJSJ+CnD5D16v5c0125jtb92zRydzk1q3SMRRgMu/2b7vKBMXpLFm634u7dKSWaOT1bpFIpQCXABf6/7bt77WXaOa4fExydyYqtYtEskU4MK2fUe4b0E6a7bu5zJ/626t1i0S8RTgMczrtfz1mzwe/2gTNaoZnhiTwrjUtmrdIi6hAI9ReXuPcN/CdL7zt+7ZY5JJaKzWLeImCvAY4/Va5n2Tx+MfbaRm9Wo8MTaFcf3VukXcSAEeQ/L2+l7r/i5vP5d3bcnM0WrdIm6mAI8BXq/lja/zeOJjX+t+alxvxvRro9Yt4nIK8Ci3de8R7luQxvd5B7i8a0tmjU6hVeM6To8lIkGgAI9SHn/rflKtWyRqKcCj0Na9R5g4P4212w5wRbc4Zo5KVusWiUIK8Cji8Vpe/2orT368ido1qvH0jb0Z1VetWyRaKcCjxJY9JUxckM66bQe4slscM0cnE99IrVskminAXU6tWyR2KcBdbPOeEu7zt+6ruvte645T6xaJGQpwF/J4La99uZWnPtlEnZrVeeamPozs01qtWyTGKMBdJreohIkL0vhx+0Gu6h7PzFG91LpFYpQC3CU8XstfvtzCU59kU1etW0RQgLtC5dZ9dY94HhvVi7iGat0isU4BHsE8XsvcL7bwx0+zqVerOs/e3IcRvdW6RcRHAR6hcosOc+/8dH7KP8g1PeJ5VK1bRE6gAI8wFR4vc7/cytNq3SJyBgrwCJJTeJh7F6STln+Qa3vG8+gNybRsWNvpsUQkQinAI0CFx8ucL7bwzKc51K9dnedv6cuwlAS1bhE5LQW4w3IKD3Pv/DTSdhxiSM9WPHJDL7VuEQnIGQPcGFMH+Byo7b//AmvtdGNMM+BtIAnIA2601h4I3ajRRa1bRM5VIA28DLjCWltijKkJfGmM+RAYDayw1s42xkwCJgH3h3DWqJHtb93pOw5xXS9f627RQK1bRKrmjAFurbVAif9iTf8fC4wEBvuvnwesRgF+WhUeL69+voVnl+fQoE4NXvxFP65PSXB6LBFxqYBeAzfGVAfWAZ2AF621a4wx8dbaAgBrbYExJu4Ux04AJgAkJiYGZ2oX2rTb17ozdh7i+uQEHh7ZU61bRM5JQAFurfUAfYwxTYDFxphegT6AtXYOMAcgNTXVntWULla5dTdU6xaRIKrSWSjW2oPGmNXAEKDQGJPgb98JQFEoBnSzjbuLmTg/3de6UxKYMaInzdW6RSRIAjkLpSVQ7g/vusBVwOPAMmA8MNv/dWkoB3WTco+XV1Zv5rmVOTSqU5OXbu3H0GS1bhEJrkAaeAIwz/86eDXgHWvte8aYb4B3jDF3AduBcSGc0zU2FBQzcUEamTuLGZaSwMNq3SISIoGchZIO9D3J9fuAK0MxlBuVe7y8vHozz6/MoXHdmrx8az+uU+sWkRDSOzGDYENBMffOTyNrVzHDe7fm4RE9aVa/ltNjiUiUU4Cfg3KPl5dWbeaFVb7W/cpt/RjSS61bRMJDAX6W1u/yte71BcWM6N2ah9S6RSTMFOBVVO7x8uKqXF5YmUuTerV45bb+DOnVyumxRCQGKcCrIGvXISbOT2d9QTEj+7TmoeE9aarWLSIOUYAH4HiFr3W/uMrXul/9ZX+u7anWLSLOUoCfQdauQ9w7P50NBcXc0Kc109W6RSRCKMBP4XiFlxdW5fLSqlya1q/FnF/25xq1bhGJIArwk8jceYh756excfdhRvVtw/ThPWhST61bRCKLAryS4xVeXliZw0urN9O0fi3+/KtUru4R7/RYIiInpQD3q9y6R/dtwzS1bhGJcDEf4GUVHl5YmctLqzfTvH4t5v4qlavUukXEBWI6wDN2+Fr3psLDjO7XhunDetK4Xk2nxxIRCUhMBnhZhYfnV+Ty8me+1v2X8alc2V2tW0TcJeYCPH3HQSbOT2dT4WHG9GvLtGE91LpFxJViJsDLKjw8tyKHVz7bQosGtXjt9lSu6KbWLSLuFRMBnr7jIPfOTyO7sISx/dvy4LAeNK6r1i0i7hbVAV5W4eHZ5Tm8+vkWWjaozeu3D+DybnFOjyUiEhRRG+Bp+b7WnVNUwrj+bXlArVtEokzUBXhpuYdnV+Tw6mebiWtYh9fvGMDlXdW6RST6RFWA/5R/kIn+1n1jqq91N6qj1i0i0SkqAry03MMzy3OY8/lm4hvV4Y07BjBYrVtEopzrA/zH7QeYuCCd3KISbkptx9Rh3dW6RSQmuDbAS8s9/Gl5Nn/+fAvxjeow786BXNalpdNjiYiEjSsD/IftB5g4P43Ne45w84B2TLlerVtEYo+rAry03MOfPs3mz19soVWjOvz1zoFcqtYtIjHKNQH+w/YD3Ds/jS17jnDLwHZMGdqdhmrdIhLDXBHgz6/I4U/Ls9W6RUQqcUWAt29Rn5sGJDJlaDe1bhERP1cE+IjerRnRu7XTY4iIRJRqTg8gIiJnRwEuIuJSCnAREZdSgIuIuJQCXETEpRTgIiIupQAXEXEpBbiIiEsZa234HsyYPcC2szy8BbA3iOMEi+aqGs1VNZqraiJ1Lji32dpba//jM0TCGuDnwhiz1lqb6vQcJ9JcVaO5qkZzVU2kzgWhmU0voYiIuJQCXETEpdwU4HOcHuAUNFfVaK6q0VxVE6lzQQhmc81r4CIi8u/c1MBFRKQSBbiIiEtFRIAbY4YYYzYZY3KNMZNOcrsxxjznvz3dGNMv0GNDPNet/nnSjTFfG2N6V7otzxiTYYz5yRizNsxzDTbGHPI/9k/GmGmBHhviuSZWminTGOMxxjTz3xaS58sY85oxpsgYk3mK251aW2eay6m1daa5nFpbZ5or7GvL/7PbGWNWGWM2GGOyjDH3nOQ+oVtj1lpH/wDVgc1AR6AWkAb0OOE+Q4EPAQNcAKwJ9NgQz3UR0NT//XU/z+W/nAe0cOj5Ggy8dzbHhnKuE+4/HFgZhufrUqAfkHmK28O+tgKcK+xrK8C5wr62ApnLibXl/9kJQD//9w2B7HDmVyQ08IFArrV2i7X2OPBPYOQJ9xkJ/NX6fAs0McYkBHhsyOay1n5trT3gv/gt0DZIj31Oc4Xo2GD/7FuAfwTpsU/JWvs5sP80d3FibZ1xLofWViDP16k4+nydICxrC8BaW2Ct/cH//WFgA9DmhLuFbI1FQoC3AfIrXd7Bfz4Bp7pPIMeGcq7K7sL3X9mfWeATY8w6Y8yEIM1UlbkuNMakGWM+NMb0rOKxoZwLY0w9YAiwsNLVoXq+zsSJtVVV4VpbgQr32gqYk2vLGJME9AXWnHBTyNZYJGxqbE5y3YnnNp7qPoEce7YC/tnGmMvx/SMbVOnqi621u4wxccCnxpiN/hYRjrl+wPfZCSXGmKHAEqBzgMeGcq6fDQe+stZWblSher7OxIm1FbAwr61AOLG2qsKRtWWMaYDvPxr/z1pbfOLNJzkkKGssEhr4DqBdpcttgV0B3ieQY0M5F8aYFGAuMNJau+/n6621u/xfi4DF+P53KSxzWWuLrbUl/u8/AGoaY1oEcmwo56rkZk74X9wQPl9n4sTaCogDa+uMHFpbVRH2tWWMqYkvvN+y1i46yV1Ct8ZC8cJ+FX8JUAPYAnTgXy/k9zzhPtfz778E+C7QY0M8VyKQC1x0wvX1gYaVvv8aGBLGuVrxrzdpDQS2+587R58v//0a43sts344ni//z0zi1L+UC/vaCnCusK+tAOcK+9oKZC4H15YB/go8c5r7hGyNBe3JPccnYSi+395uBqb6r7sbuLvSk/Si//YMIPV0x4ZxrrnAAeAn/5+1/us7+v8y0oAsB+b6P/7HTcP3C7CLTndsuObyX74d+OcJx4Xs+cLXxgqAcnyN564IWVtnmsuptXWmuZxaW6edy4m15f/5g/C97JFe6e9qaLjWmN5KLyLiUpHwGriIiJwFBbiIiEspwEVEXEoBLiLiUgpwERGXUoCLiLiUAlxExKX+P9PuJflQ+0R7AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "data.high.cumsum().plot() # 可以直接画图，默认折线图 ， 最终是使用matplotlib\n",
    "# data.plot(x='date') \n",
    "# help(data.high.plot)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 237,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    11\n",
       "1     6\n",
       "2     2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 237,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 自定义函数\n",
    "data.apply(lambda x : x.max() - x.min()) #默认按照列, 每列最大值减去最小值\n",
    "data[['low','high']].apply(lambda x : x.high - x.low, axis=1) # 按照行，最高气温减去最低气温"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 243,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xe45beb0>"
      ]
     },
     "execution_count": 243,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAE5CAYAAACJTnubAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAVVUlEQVR4nO3dfZSedX3n8ffHgAmVFCUJgRLCgFIRcIk1i4+JYX3CqkfYXWtztgqHLmGPeKysYtkux432QUtxi+6ptnFRtLoVRF1pRUDYtcBKC4GFNYQolo0SjRCTgNoal8B3/5gryTBMmJl7Hu78Mu/XOXPmvq+H+/re88355De/a+7rSlUhSWrP0/pdgCSpNwa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBrn1Kko1JXtXvOiYqyeokn+13Hdq/GeDSOCU5oN81SGCAax+S5C+BxcBfJ/lZkvcmeXGSbyZ5OMndSVYM2f4bSf6gW/+zJH+dZF6SzyX5SZLbkwwM2b6SvDPJ/Ul+nORPkjxtyPqzk9ybZHuS65IcPWzf85LcB9zXLftIkge6Y92RZFm3/DTg94C3dHXd3S0/JMllSTYn+UFX+6yp/JlqP1dVfvm1z3wBG4FXdY+PBLYCv87gYOPV3fMF3fpvAN8Fng0cAqwHvgO8CjgA+AzwqSGvXcD/BA5l8D+K7wD/tlt3evdaz+v2vQj45rB9v97te1C37LeAed327wZ+BMzp1q0GPjvsvf134C+AZwCHAbcB5/b7Z+5Xu1+OwLUv+y3gmqq6pqoer6qvA2sZDPRdPlVV/1BVjwBfA/6hqm6oqp3AF4AXDHvNP66qbVX1feBSYGW3/Fzgg1V1b7fvHwFLho7Cu/XbqurnAFX12araWlU7q+rDwGzguSO9kSQLgdcB76qqf6yqh4A/BX6z1x+OZIBrX3Y08OZu+uThJA8DLweOGLLNg0Me/3yE5wcPe80Hhjz+HvArQ471kSHH2QaEwd8CRtqXJO/uplwe6fY5BJj/FO/lQGDzkGP8BYMjcaknnozRvmbo5TEfAP6yqs6ZxNc/Crine7wY+OGQY/1hVX1uLLV1892/C7wSuKeqHk+yncHQf8K2Q17/F8D8boQvTZgjcO1rHgSO7R5/FnhjktcmmZVkTpIVSRZN4PUvSPKsJEcBvwNc0S3/c+A/JDkRdp9wfPNTvM5cYCewBTggyfuAXx72PgZ2nSStqs3A9cCHk/xykqcleXaSV0zgvWiGM8C1r/kgcFE3xfAW4E0M/kXHFgZHsRcwsX+3XwHuAO4CvgpcBlBVXwb+GPh8kp8A6xics96b6xicc/8Og1MxO3jiFMsXuu9bk9zZPX4b8HQGT7ZuB67iidNB0rikyhs6aGZIUsBxVfXdftciTQZH4JLUKANckhrlFIokNcoRuCQ1ygCXpEZN6wd55s+fXwMDA9N5SElq3h133PHjqlowfPm0BvjAwABr166dzkNKUvOSfG+k5U6hSFKjDHBJapQBLkmN8mqEkprw6KOPsmnTJnbs2NHvUqbMnDlzWLRoEQceeOCYtjfAJTVh06ZNzJ07l4GBAZKMvkNjqoqtW7eyadMmjjnmmDHt4xSKpCbs2LGDefPm7ZfhDZCEefPmjes3DANcUjP21/DeZbzvzwCXpDE6+ODhd+jrL+fAJ9PqQ6b5eI9M7/GkfcjAhV+d1Nfb+KHXT+rrTQdH4JI0TlXFBRdcwEknncTzn/98rrhi8M58b3/727n66qsBOOOMMzj77LMBuOyyy7jooosmvQ4DXJLG6Utf+hJ33XUXd999NzfccAMXXHABmzdvZvny5dx8880A/OAHP2D9+vUA3HLLLSxbtmzS6zDAJWmcbrnlFlauXMmsWbNYuHAhr3jFK7j99ttZtmwZN998M+vXr+eEE05g4cKFbN68mVtvvZWXvvSlk16Hc+CSNE57uxHOkUceyfbt27n22mtZvnw527Zt48orr+Tggw9m7ty5k16HI3BJGqfly5dzxRVX8Nhjj7FlyxZuuukmTjnlFABe8pKXcOmll7J8+XKWLVvGJZdcMiXTJ+AIXJLG7YwzzuDWW2/l5JNPJgkXX3wxhx9+OADLli3j+uuv5znPeQ5HH30027Ztm7IAn9Z7Yi5durT26+uB+2eE0pS59957ed7zntfvMqbcSO8zyR1VtXT4tk6hSFKjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JI0Rhs3buSkk0560vL3ve993HDDDU+57+rVq7nkkksmtR4/yCOpTZP9uYsJfK7iAx/4wCQWMnaOwCVpHB577DHOOeccTjzxRF7zmtfw85//nLPOOourrroKgGuuuYbjjz+el7/85bzzne/kDW94w+59169fz4oVKzj22GP56Ec/OuFaDHBJGof77ruP8847j3vuuYdnPvOZfPGLX9y9bseOHZx77rl87Wtf45ZbbmHLli1P2HfDhg1cd9113Hbbbbz//e/n0UcfnVAtTqFI4GUQNGbHHHMMS5YsAeCFL3whGzdu3L1uw4YNHHvssbvvKr9y5UrWrFmze/3rX/96Zs+ezezZsznssMN48MEHWbRoUc+1OAKXpHGYPXv27sezZs1i586du5+Pdm2pp9q3Fwa4JE2S448/nvvvv3/3qHzXrdamilMokjRJDjroID72sY9x2mmnMX/+/N3XCJ8qBrikNvXhPMLAwADr1q3b/fw973nPk7Y59dRT2bBhA1XFeeedx9Klg1eBXb169RO2G/o6vXIKRZIm0Sc+8QmWLFnCiSeeyCOPPMK55547ZccadQSe5CjgM8DhwOPAmqr6SJJDgSuAAWAj8BtVtX3KKpWkBpx//vmcf/7503KssYzAdwLvrqrnAS8GzktyAnAhcGNVHQfc2D2XJE2TUQO8qjZX1Z3d458C9wJHAm8CPt1t9mng9KkqUpJg9D/Ta91439+4TmImGQBeAPw9sLCqNncH3ZzksL3sswpYBbB48eJxFSdJu8yZM4etW7cyb948kjxx5Q//9/QW8ysvmPSXrCq2bt3KnDlzxrzPmAM8ycHAF4F3VdVPnvQD3HtRa4A1MHhT4zFXJklDLFq0iE2bNj3p4+kAPPzQ9BbzyL1T8rJz5swZ1yczxxTgSQ5kMLw/V1Vf6hY/mOSIbvR9BDDNP0FJM8mBBx64+yPqT7L6xdNbzD5yKYRR58AzONS+DLi3qv7zkFVXA2d2j88EvjL55UmS9mYsI/CXAW8FvpXkrm7Z7wEfAq5M8tvA94E3T02JkqSRjBrgVXULsLcJ71dObjmSpLHyk5iS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJatS47sgjSWMxcOFXp/V4G8d+E5v9iiNwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckho1aoAn+WSSh5KsG7JsdZIfJLmr+/r1qS1TkjTcWEbglwOnjbD8T6tqSfd1zeSWJUkazagBXlU3AdumoRZJ0jgcMIF935HkbcBa4N1VtX2kjZKsAlYBLF68eAKH00wycOFXp/V4G+dM6+GkSdHrScyPA88GlgCbgQ/vbcOqWlNVS6tq6YIFC3o8nCRpuJ4CvKoerKrHqupx4BPAKZNbliRpND0FeJIjhjw9A1i3t20lSVNj1DnwJH8FrADmJ9kE/CdgRZIlQAEbgXOnsEZJ0ghGDfCqWjnC4sumoBZJ0jj4SUxJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRo16V/qWDVz41Wk93sY503o4STOcI3BJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1KhRAzzJJ5M8lGTdkGWHJvl6kvu678+a2jIlScONZQR+OXDasGUXAjdW1XHAjd1zSdI0GjXAq+omYNuwxW8CPt09/jRw+iTXJUkaRa9z4AurajNA9/2wvW2YZFWStUnWbtmypcfDSZKGm/KTmFW1pqqWVtXSBQsWTPXhJGnG6DXAH0xyBED3/aHJK0mSNBa9BvjVwJnd4zOBr0xOOZKksRrLnxH+FXAr8Nwkm5L8NvAh4NVJ7gNe3T2XJE2jA0bboKpW7mXVKye5FknSOPhJTElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ16oCJ7JxkI/BT4DFgZ1UtnYyiJEmjm1CAd06tqh9PwutIksbBKRRJatREA7yA65PckWTVSBskWZVkbZK1W7ZsmeDhJEm7TDTAX1ZVvwa8DjgvyfLhG1TVmqpaWlVLFyxYMMHDSZJ2mVCAV9UPu+8PAV8GTpmMoiRJo+s5wJM8I8ncXY+B1wDrJqswSdJTm8hfoSwEvpxk1+v8t6q6dlKqkiSNqucAr6r7gZMnsRZJ0jj4Z4SS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJatSEAjzJaUm+neS7SS6crKIkSaPrOcCTzAL+DHgdcAKwMskJk1WYJOmpTWQEfgrw3aq6v6r+H/B54E2TU5YkaTQHTGDfI4EHhjzfBLxo+EZJVgGruqc/S/LtCRxznxaYD/x42g74/kzbofZ39q5tM6B/R4+0cCIBPtI7qCctqFoDrJnAcZqRZG1VLe13HRo/e9e2mdq/iUyhbAKOGvJ8EfDDiZUjSRqriQT47cBxSY5J8nTgN4GrJ6csSdJoep5CqaqdSd4BXAfMAj5ZVfdMWmVtmhFTRfspe9e2Gdm/VD1p2lqS1AA/iSlJjTLAJalRBrgkNcoAl6RGGeCTLMmMPBvekiSzkpyb5PeTvGzYuov6VZfGJskvJXlvkguSzElyVpKrk1yc5OB+1zed/CuUHiQ5dG+rgLuratF01qPxSfJfgV8CbgPeCvxtVf37bt2dVfVr/axPTy3JlQxexuMg4LnAvcCVwBuBw6vqrX0sb1oZ4D1I8hjwPZ54OYHqnh9ZVU/vS2EakyT/p6r+Wff4AOBjDF5LYyXwd1X1gn7Wp6eW5K6qWpIkwGbgiKqq7vndu3o7E0zkWigz2f3AK6vq+8NXJHlghO21b9n9H2xV7QRWJXkf8D+AGfUreMu60L6mulFo93xGjUidA+/NpcCz9rLu4uksRD1Zm+S0oQuq6gPAp4CBvlSk8Vi7a667qs7etTDJs4Gf9q2qPnAKRdJ+I0lqBoWaUyg9SnIIcBqD10UvBq/EeF1VPdzXwjQm9q9t9m+QUyg9SPI24E5gBYN/zfAM4FTgjm6d9mH2r232bw+nUHrQ3VXoRcP/t0/yLODvq+pX+1OZxsL+tc3+7eEIvDdhhLsPAY8z8p2KtG+xf22zfx3nwHvzh8CdSa5nz31BFwOvBn6/b1VprOxf2+xfxymUHnW/rr2WwZMoYfAWc9dV1fa+FqYxsX9ts3+DDHBJapRz4JMsybf6XYN6Z//aNtP65xx4D5L8y72tAg6fzlo0fvavbfZvDwO8N1cAn2PkM+FzprkWjZ/9a5v96zgH3oMkdwBnVtW6EdY9UFVH9aEsjZH9a5v928M58N68C/jJXtadMZ2FqCf2r232r+MIXJIa5Rx4j5K8FjidJ15M5ytVdW1fC9OY2L+22b9BjsB7kORS4FeBzzD4AQKARcDbgPuq6nf6VZtGZ//aZv/2MMB7kOQ7I10wp7ul03eq6rg+lKUxsn9ts397eBKzNzuSnDLC8n8O7JjuYjRu9q9t9q/jHHhvzgI+nmQue36FO4rBM+Nn9akmjd1Z2L+WnYX9A5xCmZAkhzPkYjpV9aM+l6RxsH9ts38G+KRLcnxVbeh3HeqN/WvbTOufAT7Jkny/qhb3uw71xv61bab1zznwHiT56N5WAc+czlo0fvavbfZvD0fgPUjyU+DdwC9GWP3hqpo/zSVpHOxf2+zfHo7Ae3M7sK6qvjl8RZLV01+Oxsn+tc3+dRyB9yDJocCOqvqnftei8bN/bbN/exjgktQoP4nZgySHJPlQkg1JtnZf93bLZtRJlBbZv7bZvz0M8N5cCWwHVlTVvKqaB5zaLftCXyvTWNi/ttm/jlMoPUjy7ap67njXad9g/9pm//ZwBN6b7yV5b5KFuxYkWZjkd4EH+liXxsb+tc3+dQzw3rwFmAf8bZLtSbYB3wAOBX6jn4VpTOxf2+xfxymUHiU5nsGLyP9dVf1syPLTZtpdQVpk/9pm/wY5Au9BkncCXwHeAaxL8qYhq/+oP1VprOxf2+zfHn4SszfnAC+sqp8lGQCuSjJQVR9h8HoM2rfZv7bZv44B3ptZu35tq6qNSVYw+I/oaGbYP6BG2b+22b+OUyi9+VGSJbuedP+Y3gDMB57ft6o0Vvavbfav40nMHiRZBOwc6Q4gSV5WVf+rD2VpjOxf2+zfHga4JDXKKRRJapQBLkmNMsA1oyRZneQ9T7H+9CQnTGdNUq8McOmJTgcMcDXBk5ja7yX5j8DbGLzQ0RbgDuARYBXwdOC7wFuBJcDfdOseAf5V9xJ/BiwA/gk4p6o2TGf90t4Y4NqvJXkhcDnwIgY/uHYn8OfAp6pqa7fNHwAPVtV/SXI58DdVdVW37kbg31XVfUleBHywqv7F9L8T6cn8JKb2d8uAL++6f2KSq7vlJ3XB/UzgYOC64TsmORh4KfCFZPcH/GZPecXSGBngmglG+jXzcuD0qro7yVnAihG2eRrwcFUtGWGd1HeexNT+7ibgjCQHJZkLvLFbPhfYnORA4N8M2f6n3Tqq6ifA/03yZoAMOnn6SpeemnPg2u8NOYn5PWATsB74R+C93bJvAXOr6qwkLwM+AfwC+NfA48DHgSOAA4HPV9UHpv1NSCMwwCWpUU6hSFKjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhr1/wECeVD3y++nJQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# data.plot(x='date', y='high', kind='bar', title='temperate') # 柱状图\n",
    "data.plot(x='date', y=['low','high'], kind='bar', title='temperate') # 柱状图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 249,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2011</th>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2020</th>\n",
       "      <td>18</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      low  high  avg\n",
       "2011   12    23   14\n",
       "2010   15    21   16\n",
       "2020   18    20   19"
      ]
     },
     "execution_count": 249,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 文件读取，支持csv，hdf5, json，html等\n",
    "data = pd.read_csv('demo_pandas.csv')  # 该文件与demo.csv差别就是，该文件标题行3个值，没有date标题行，这样date列自动变为索引\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 266,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21"
      ]
     },
     "execution_count": 266,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# data.index\n",
    "# data.columns\n",
    "data['low'][2010]  # 列行定位\n",
    "data.loc[2010,'low'] # 行列定位\n",
    "data.iloc[1,1] # 索引定位"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 272,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data[:2].to_csv('demo_out.csv', columns=['high']) # 保存前两行数据的high列到csv文件\n",
    "# help(data.to_csv) # 更多参数参考文档\n",
    "data[:2].to_csv('demo_out.csv', columns=['high'], index_label='year') # 保存前两行数据的high列到csv文件, 并设置index列名称，默认没有"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 281,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Hdf5二进制文件读写， 支持压缩，节省磁盘，跨平台\n",
    "# hdf5文件时k-v键值对的集合，key为存储key, value对应DataFrame\n",
    "# help(data.to_hdf)\n",
    "data.avg.to_hdf('demo_out.h5', key='avg', mode='w') #  存储avg列到hdf5文件，key为avg, 默认是mode a 追加模式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2011    14\n",
       "2010    16\n",
       "2020    19\n",
       "Name: avg, dtype: int64"
      ]
     },
     "execution_count": 283,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hd = pd.read_hdf('demo_out.h5') # 如果hdf文件中只有一个key默认就读那个key\n",
    "hd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 286,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2011    14\n",
       "2010    16\n",
       "2020    19\n",
       "Name: avg, dtype: int64"
      ]
     },
     "execution_count": 286,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.avg.to_hdf('demo_out.h5',key='avg2') # 默认是a 追加，追加一个key\n",
    "hd = pd.read_hdf('demo_out.h5', key='avg') # 此时多个key,不指定key,报错\n",
    "hd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 287,
   "metadata": {},
   "outputs": [],
   "source": [
    "# json文件读写\n",
    "data.to_json('demo_json.txt', orient='records', lines=True) # lines是否每行一个 records格式，行格式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>18</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   low  high  avg\n",
       "0   12    23   14\n",
       "1   15    21   16\n",
       "2   18    20   19"
      ]
     },
     "execution_count": 291,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# help(pd.read_json)\n",
    "jd = pd.read_json('demo_json.txt', orient='records', lines=True) # 读取json, 每行一条记录\n",
    "jd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 317,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date   low  high  avg\n",
       "1  2010102  15.0  21.0   16"
      ]
     },
     "execution_count": 317,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 缺失值判断\n",
    "data = pd.read_csv('demo_nan.csv')\n",
    "data\n",
    "pd.isnull(data) # 空值判断\n",
    "pd.notnull(data)\n",
    "pd.isna(data) # na判断\n",
    "pd.isnull(data).any() # 哪列存在缺失值\n",
    "np.any(pd.isnull(data)) # 是否有空值 等于上面操作\n",
    "pd.isnull(data).all() # \n",
    "np.all(pd.isnull(data)) # 是否都是空值 等于上面操作\n",
    "\n",
    "# 删除缺失值\n",
    "d2 = data.dropna() # 返回删除后的集合,参数inplace=True则直接在原集合修改\n",
    "d2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 318,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>13.5</td>\n",
       "      <td>20.0</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date   low  high  avg\n",
       "0  2010101  12.0   NaN   14\n",
       "1  2010102  15.0  21.0   16\n",
       "2  2010103  13.5  20.0   19"
      ]
     },
     "execution_count": 318,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 替换缺失值. 使用含缺失值的那列平均值替换缺失值，inplace在原集合替换\n",
    "data['low'].fillna(data['low'].mean(), inplace=True)\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 337,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date   low  high  avg\n",
       "0  2010101  12.0   NaN   14"
      ]
     },
     "execution_count": 337,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[pd.notnull(data).all(axis=1)] # 只显示不为null的，如果某一行不是所有列都不是null则过滤掉\n",
    "data[pd.isnull(data).any(axis=1)] # 只显示为null的, 如果某行存在null值，则显示"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 346,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c1</th>\n",
       "      <th>c2</th>\n",
       "      <th>c3</th>\n",
       "      <th>c4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        c1  c2  c3  c4\n",
       "0  2010101  12   3  14\n",
       "1  2010102  15  21  16\n",
       "2      NaN   3  20  19"
      ]
     },
     "execution_count": 346,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 替换标记数据，就是不是nan也不是空的，比如占位?\n",
    "data = pd.read_csv('demo_nil.csv', names=['c1','c2','c3','c4']) # 该csv文件没有表头，默认第一行会被当成表头，所以需要二外配置表头\n",
    "d3 = data.replace(to_replace=\"?\", value=np.nan) # 将其他值替换为nan，然后再对nan进行确实操作，比如删除、替换\n",
    "d3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 364,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>身高_(172.738, 177.738]</th>\n",
       "      <th>身高_(177.738, 179.138]</th>\n",
       "      <th>身高_(179.138, 180.079]</th>\n",
       "      <th>身高_(180.079, 181.278]</th>\n",
       "      <th>身高_(181.278, 183.606]</th>\n",
       "      <th>身高_(183.606, 188.379]</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   身高_(172.738, 177.738]  身高_(177.738, 179.138]  身高_(179.138, 180.079]  \\\n",
       "0                      1                      0                      0   \n",
       "1                      1                      0                      0   \n",
       "2                      1                      0                      0   \n",
       "3                      0                      0                      0   \n",
       "4                      0                      0                      0   \n",
       "\n",
       "   身高_(180.079, 181.278]  身高_(181.278, 183.606]  身高_(183.606, 188.379]  \n",
       "0                      0                      0                      0  \n",
       "1                      0                      0                      0  \n",
       "2                      0                      0                      0  \n",
       "3                      0                      0                      1  \n",
       "4                      0                      1                      0  "
      ]
     },
     "execution_count": 364,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 数据离散化 ， 对数据分组后，每种属性使用0-1代表是否属于该组\n",
    "height = np.random.normal(loc=180, scale=5, size=30) # 随机生成180为中轴正态分布的身高数据30个\n",
    "height\n",
    "sr = pd.Series(height) # 使用一维Series存储数据\n",
    "sr\n",
    "sr = pd.qcut(sr, 6) # 自动离散化，第一个参数是Series数据，第二个参数是分组数，会自动将数据分为6个区间\n",
    "sr # 含有区间的Series数据\n",
    "sr.value_counts() # 显示每个区间有几个数据，就是每个区间分别包含多少数据\n",
    "pd.get_dummies(sr, '身高').head() #  显示前5条，表格数据中的0位代表不属于该区间，1代表属于，这就是one-hot编码(也叫哑变量)形式数据离散化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 368,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>h_(170, 180]</th>\n",
       "      <th>h_(180, 190]</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   h_(170, 180]  h_(180, 190]\n",
       "0             1             0\n",
       "1             1             0\n",
       "2             1             0\n",
       "3             0             1\n",
       "4             0             1"
      ]
     },
     "execution_count": 368,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 手动分组\n",
    "sr = pd.Series(height) \n",
    "sr = pd.cut(sr, [170,180,190]) # 手动分组函数cut, 第二个参数是每组的边界, 这里分2组\n",
    "sr.value_counts()\n",
    "pd.get_dummies(sr, 'h').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 374,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>12.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>18.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.0</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date   low  high  avg\n",
       "0  2010101  12.0  23.0   14\n",
       "1  2010102  15.0  21.0   16\n",
       "2  2010103  18.0  20.0   19\n",
       "0  2010101  12.0   NaN   14\n",
       "1  2010102  15.0  21.0   16\n",
       "2  2010103   NaN  20.0   19"
      ]
     },
     "execution_count": 374,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 拼接pd.concat\n",
    "# 水平 数值拼接，与numpy的数组拼接用法相似\n",
    "d1 = pd.read_csv('demo.csv')\n",
    "d1\n",
    "d2 = pd.read_csv('demo_nan.csv')\n",
    "d2\n",
    "pd.concat([d1,d2], axis=1) # 水平拼接\n",
    "pd.concat([d1,d2]) # 竖直拼接， 如果列不相同，两个表相同字段拼接，不同的字段没有的填nan，列是两个表的列并集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 376,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>low_x</th>\n",
       "      <th>high</th>\n",
       "      <th>avg_x</th>\n",
       "      <th>low_y</th>\n",
       "      <th>avg_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2010101</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "      <td>14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2010102</td>\n",
       "      <td>15</td>\n",
       "      <td>21</td>\n",
       "      <td>16</td>\n",
       "      <td>15.0</td>\n",
       "      <td>16.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2010103</td>\n",
       "      <td>18</td>\n",
       "      <td>20</td>\n",
       "      <td>19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date  low_x  high  avg_x  low_y  avg_y\n",
       "0  2010101     12    23     14    NaN    NaN\n",
       "1  2010102     15    21     16   15.0   16.0\n",
       "2  2010103     18    20     19    NaN   19.0"
      ]
     },
     "execution_count": 376,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 合并 pd.merge 支持内连接inner 左连接left 右连接right 外连接out\n",
    "# 连接和数据库的表连接相同， 将两个表连接\n",
    "pd.merge(d1, d2, how='inner', on=['date', 'high']) # 两个字段进行表连接， 两个表的两个字段相同的显示出来\n",
    "pd.merge(d1, d2, how='left', on=['date', 'high']) # 左连接"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 425,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>avg_date</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "avg_date         0         1\n",
       "day                         \n",
       "1         0.666667  0.333333\n",
       "2         0.666667  0.333333\n",
       "3         0.000000  1.000000"
      ]
     },
     "execution_count": 425,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 交叉表 pd.crosstab() 用于计算某两列之间的关系, 就是两个变量间的关系\n",
    "# 例子：各个月份同一日平均温度>15和<15的比例\n",
    "d = pd.read_csv('demo_1.csv')\n",
    "dt = pd.to_datetime(d.date) # 将日期ndarray字符串列表转换为日期对象列表\n",
    "# 添加一个日字段，该字段只记录该日期是几日\n",
    "d['day'] = dt.apply(lambda x: x.day)\n",
    "d\n",
    "# 新增一列，用于记录温度大于15的为1，否则为0\n",
    "d['avg_date'] = np.where(d.avg >= 15, 1, 0)\n",
    "d\n",
    "crt = pd.crosstab(d.day, d.avg_date) # 交叉表 ，日与温度的关系，会产生每日对应的0和1的个数，就是每日大于15和小于15的个数\n",
    "crt.sum(axis=1) # 每日当前行的个数总和\n",
    "# 计算0-1所占百分比，就是>15的和<15的百分比\n",
    "crt.div(crt.sum(axis=1), axis=0) # 计算每行的和，然后用每行的数除和, 得到0和1的每日的比例\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 429,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>avg_date</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     avg_date\n",
       "day          \n",
       "1    0.333333\n",
       "2    0.333333\n",
       "3    1.000000"
      ]
     },
     "execution_count": 429,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 透视表 pd.pivot_table() 也是用于计算两个变量之间的关系\n",
    "d\n",
    "# help(d.pivot_table)\n",
    "d.pivot_table(['avg_date'], index=['day']) # 透视表直接计算出avg_date列1在每日所占的比例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 433,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xe75e6d0>"
      ]
     },
     "execution_count": 433,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEDCAYAAAAlRP8qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAS4klEQVR4nO3de7TVZZ3H8fc30AGT8IaOekBwyZhognqwbPLWRZAxmKyZwJaKVOQKRqdWq2hqTbZqGrtNk5dkSMnJVCw1Lw1K1AzTxRwBQ7wwIAHJgZrMirwECH7nj7PV43Efzj6cvc+Wh/drLdY6v+d59u/5bn9rffyd5/wukZlIknZ9r2p2AZKk+jDQJakQBrokFcJAl6RCGOiSVAgDXZIK0b9ZEx9wwAE5fPjwZk0vSbukpUuX/jYzh1Tra1qgDx8+nCVLljRreknaJUXEL7vqc8lFkgphoEtSIQx0SSpE09bQJalenn32Wdra2ti8eXOzS6mbAQMG0NLSwh577FHzZwx0Sbu8trY2Bg0axPDhw4mIZpfTa5nJE088QVtbGyNGjKj5c90uuUTE3Ij4TUQ81EV/RMRlEbE6IpZHxPE9qFuSem3z5s3sv//+RYQ5QESw//779/g3jlrW0K8Fxu+g/0xgZOXfdOCqHlUgSXVQSpg/b2e+T7eBnpk/An63gyGTgG9mu3uBfSLi4B5XIknqlXqsoR8KrO+w3VZp+1XngRExnfazeIYNG1aHqXvgksF9O19fu2RTsytoLI/frqsvjt24b8PG5vxBdN3WfTnrrLN46KGqq9LtY9at45577uGcc85paC31uGyx2u8FVV+DlJlzMrM1M1uHDKl656okFWfdunXccMMNDZ+nHmfobcDQDtstwMY67FeSeuWvp32Y9Rt/zeYtW7n4vVPYvv051q7fwBc++fcAXHvTHSx9cAWXf/ZjfOYrX+f6797F0EMO4oD99uGEY4/iIxeeV3W/S5c/wrQPf5q9Bg7gTW8e90L7unXrOPfcc3n66acBuOKKK3jjG9/IrFmzWLFiBWPGjOH888/noosuYtasWSxatIgtW7YwY8YMPvCBD/T6+9bjDP0O4LzK1S5vADZl5suWWySpr8398qdYevcNLJn/LS6bO4+zJ7yZW+/6zxf6b7rz+7x74hkseeARbpn/Q36+4AZuvfpLLHngkR3u94IPX8Jln/koP7vz31/SfuCBB7Jw4ULuv/9+brrpJi666CIALr30Uk4++WSWLVvGhz70Ia655hoGDx7M4sWLWbx4MV//+tdZu3Ztr79vt2foEXEjcBpwQES0AZ8C9gDIzNnAfGACsBp4Brig11VJUh1cNvdGvnvXfwGwfuP/sfaxDRw+rIV7ly5n5IhhrPzFL/nLsWP46tU3MGncqQwcOACAt7/tlC73uemPT/KHTU9x6kknAHDuuedy1113Ae03OM2cOZNly5bRr18/Vq1aVXUf3//+91m+fDk333xz+z43beLRRx/t0TXn1XQb6Jk5pZv+BGb0qgpJqrNF9yzhBz++j5/deS17DRzIae96P5u3bOXdE8/g23cu5LVHDOcd408nImiPsdpkQldXFH7lK1/hoIMO4oEHHuC5555jwIABXewjufzyyxk3blzV/p3ls1wkFWnTk0+x7+BB7DVwIP+7ei333v8gAGef+WZuW7CIG29bwLsnngHAm048jjsX/pjNm7fw1NPP8B8//EmX+91n8CAGv2ZvfnLfzwG4/vrrX5xz0yYOPvhgXvWqV3Hdddexfft2AAYNGsSTTz75wrhx48Zx1VVX8eyzzwKwatWqF9bde8Nb/yUVafxpb2T2dTdz7Fv/liMPH84bjn8dAPvu8xpGjRzBI4+u5cTjjgFg7JijmXjGKYx+22QOazmY1tGjGDxo7y73/Y1/ueSFP4qOe/vZL7R/8IMf5J3vfCff+c53OP3003n1q18NwLHHHkv//v0ZPXo0U6dO5eKLL2bdunUcf/zxZCZDhgzhtttu6/V3jp78qlFPra2t2acvuPA65l2bx2/X1QfHbsW4b3PUYQf2ah9PPf0Me796L57505845ez3MecLn+T41x3V/QcPOa5X8+7IihUrOOqol9YQEUszs7XaeM/QJQmY/tHP8siqNWzespXz/+as2sL8FcZAlyTghis/97K2Gf/wz/x08QMvabv4fVO44N2T+qqsHjHQJakLV37u480uoUe8ykWSCmGgS1IhDHRJKoSBLkkNdPfdd3PkkUdyxBFHcOmllzZ0Lv8oKmm3Mfyy+j4Idt1Fh+ywf/v27cyYMYOFCxfS0tLC2LFjmThxIqNGjaprHc/zDF2SGuS+++7jiCOO4PDDD2fPPfdk8uTJ3H777Q2bz0CXpAbZsGEDQ4e++LqIlpYWNmzY0LD5DHRJapBqj1Zp5MusDXRJapCWlhbWr3/xlcttbW0ccsiO1917w0CXpAYZO3Ysjz76KGvXrmXr1q3MmzePiRMnNmw+r3KRpAbp378/V1xxBePGjWP79u1MmzaNo48+unHzNWzPkvQK091lho0wYcIEJkyY0CdzueQiSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IDTZs2jQMPPJBjjjmm4XN5Hbqk3cec0+q7v+mLuh0ydepUZs6cyXnnnVffuavwDF2SGuiUU05hv/3265O5DHRJKoSBLkmFMNAlqRAGuiQVwkCXpAaaMmUKJ510EitXrqSlpYVrrrmmYXPVdNliRIwHvgr0A67OzEs79Q8GvgUMq+zzS5n5jTrXKkm9M31Rn09544039tlc3Z6hR0Q/4ErgTGAUMCUiRnUaNgN4JDNHA6cBX46IPetcqyRpB2pZcjkRWJ2ZazJzKzAPmNRpTAKDov3tp3sDvwO21bVSSdIO1RLohwLrO2y3Vdo6ugI4CtgIPAhcnJnP1aVCSVJNagn0qNKWnbbHAcuAQ4AxwBUR8ZqX7ShiekQsiYgljz/+eI+LlaTqkszOsbRr25nvU0ugtwFDO2y30H4m3tEFwK3ZbjWwFnhtlQLnZGZrZrYOGTKkx8VKUjUDNq3hiae3FRPqmckTTzzBgAEDevS5Wq5yWQyMjIgRwAZgMnBOpzGPAW8BfhwRBwFHAmt6VIkk7aSW+z9PGx/j8cGHU31RoYE2rWjIbgcMGEBLS0uPPtNtoGfmtoiYCSyg/bLFuZn5cERcWOmfDXwGuDYiHqT9v+bHMvO3Pf0CkrQz9tj6B0bc+/HmTH7JpubMW0VN16Fn5nxgfqe22R1+3gicUd/SJEk94Z2iklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEP2bXUBfGb75hmaX0FDrml1Ag3n8pO55hi5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYWoKdAjYnxErIyI1RExq4sxp0XEsoh4OCL+u75lSpK60+2t/xHRD7gSeBvQBiyOiDsy85EOY/YBvgaMz8zHIuLARhUsSaquljP0E4HVmbkmM7cC84BJncacA9yamY8BZOZv6lumJKk7tQT6ocD6DtttlbaO/gLYNyIWRcTSiDivXgVKkmpTy9MWo0pbVtnPCcBbgIHAzyLi3sxc9ZIdRUwHpgMMGzas59VKkrpUyxl6GzC0w3YLsLHKmLsz8+nM/C3wI2B05x1l5pzMbM3M1iFDhuxszZKkKmoJ9MXAyIgYERF7ApOBOzqNuR04OSL6R8RewOuBFfUtVZK0I90uuWTmtoiYCSwA+gFzM/PhiLiw0j87M1dExN3AcuA54OrMfKiRhUuSXqqmNxZl5nxgfqe22Z22vwh8sX6lSZJ6wjtFJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEDUFekSMj4iVEbE6ImbtYNzYiNgeEe+qX4mSpFp0G+gR0Q+4EjgTGAVMiYhRXYz7PLCg3kVKkrpXyxn6icDqzFyTmVuBecCkKuP+DrgF+E0d65Mk1aiWQD8UWN9hu63S9oKIOBR4BzC7fqVJknqilkCPKm3ZaftfgY9l5vYd7ihiekQsiYgljz/+eK01SpJq0L+GMW3A0A7bLcDGTmNagXkRAXAAMCEitmXmbR0HZeYcYA5Aa2tr5/8pSJJ6oZZAXwyMjIgRwAZgMnBOxwGZOeL5nyPiWuB7ncNcktRY3QZ6Zm6LiJm0X73SD5ibmQ9HxIWVftfNJekVoJYzdDJzPjC/U1vVIM/Mqb0vS5LUU94pKkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSpETYEeEeMjYmVErI6IWVX63xMRyyv/7omI0fUvVZK0I90GekT0A64EzgRGAVMiYlSnYWuBUzPzWOAzwJx6FypJ2rFaztBPBFZn5prM3ArMAyZ1HJCZ92Tm7yub9wIt9S1TktSdWgL9UGB9h+22SltX3gvc1ZuiJEk917+GMVGlLasOjDid9kB/Uxf904HpAMOGDauxRElSLWo5Q28DhnbYbgE2dh4UEccCVwOTMvOJajvKzDmZ2ZqZrUOGDNmZeiVJXagl0BcDIyNiRETsCUwG7ug4ICKGAbcC52bmqvqXKUnqTrdLLpm5LSJmAguAfsDczHw4Ii6s9M8G/hHYH/haRABsy8zWxpUtSeqsljV0MnM+ML9T2+wOP78PeF99S5Mk9YR3ikpSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiEMdEkqhIEuSYUw0CWpEAa6JBXCQJekQhjoklQIA12SCmGgS1IhDHRJKoSBLkmFMNAlqRAGuiQVwkCXpEIY6JJUCANdkgphoEtSIQx0SSqEgS5JhTDQJakQBrokFcJAl6RCGOiSVAgDXZIKYaBLUiFqCvSIGB8RKyNidUTMqtIfEXFZpX95RBxf/1IlSTvSbaBHRD/gSuBMYBQwJSJGdRp2JjCy8m86cFWd65QkdaOWM/QTgdWZuSYztwLzgEmdxkwCvpnt7gX2iYiD61yrJGkH+tcw5lBgfYftNuD1NYw5FPhVx0ERMZ32M3iApyJiZY+q3bUcAPy2ryaLz/fVTLsNj9+uq0+PHZ+OPpuq4rCuOmoJ9GrV5k6MITPnAHNqmHOXFxFLMrO12XVo53j8dl2787GrZcmlDRjaYbsF2LgTYyRJDVRLoC8GRkbEiIjYE5gM3NFpzB3AeZWrXd4AbMrMX3XekSSpcbpdcsnMbRExE1gA9APmZubDEXFhpX82MB+YAKwGngEuaFzJu4zdYmmpYB6/Xddue+wi82VL3ZKkXZB3ikpSIQx0SSqEgS5JhTDQtduLiNdGxFsiYu9O7eObVZNqFxEnRsTYys+jIuLDETGh2XU1g38UbbCIuCAzv9HsOlRdRFwEzABWAGOAizPz9krf/Znpg+ZewSLiU7Q/S6o/sJD2u9gXAW8FFmTmPzWvur5noDdYRDyWmcOaXYeqi4gHgZMy86mIGA7cDFyXmV+NiJ9n5nFNLVA7VDl+Y4A/A34NtGTmHyNiIPA/mXlsUwvsY7Xc+q9uRMTyrrqAg/qyFvVYv8x8CiAz10XEacDNEXEY1R9poVeWbZm5HXgmIn6RmX8EyMw/RcRzTa6tzxno9XEQMA74faf2AO7p+3LUA7+OiDGZuQygcqZ+FjAXeF1zS1MNtkbEXpn5DHDC840RMRgw0LVTvgfs/XwodBQRi/q+HPXAecC2jg2ZuY32R1n8W3NKUg+ckplbADKzY4DvAZzfnJKaxzV0SSqEly1KUiEMdEkqhIGu3V5EXBIRH2l2HVJvGeiSVAgDXbuliPhERKyMiB8AR1ba3h8RiyPigYi4JSL2iohBEbE2IvaojHlNRKx7flt6JTHQtduJiBNof/PWccDZwNhK162ZOTYzR9P+KID3ZuaTtN9K/leVMZOBWzLz2b6tWuqega7d0cnAdzPzmcqdhc+/UvGYiPhx5Xby9wBHV9qv5sW3cF0A+GwevSIZ6NpdVbsB41pgZma+Dvg0MAAgM38KDI+IU2l/VMBDfVal1AMGunZHPwLeEREDI2IQ8PZK+yDgV5X18fd0+sw3gRvx7FyvYN4pqt1SRHyC9tv+fwm0AY8ATwMfrbQ9CAzKzKmV8X8OrAUOzsw/NKNmqTsGulSDiHgXMCkzz212LVJXfDiX1I2IuJz2lyjslm/B0a7DM3RJKoR/FJWkQhjoklQIA12SCmGgS1IhDHRJKoSBLkmF+H9kbPKTf3Pe9wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 直接绘图\n",
    "crt.div(crt.sum(axis=1), axis=0).plot(kind='bar', stacked=True) # 堆叠的柱状图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 453,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0E59A940>"
      ]
     },
     "execution_count": 453,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# DataFrame Series的分组于聚合, 进行groupby后得到一个DataFrameGroupBy对象，要想展示出来，还需要进行聚合函数操作\n",
    "d.groupby(by=\"day\").avg.max() # 根据day字段分组，并且对avg字段取最大值\n",
    "\n",
    "# Series分组\n",
    "d.avg.groupby(by=d.day).count() # 根据day分组，并聚合avg字段的个数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 452,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>low</th>\n",
       "      <th>high</th>\n",
       "      <th>avg</th>\n",
       "      <th>avg_date</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>day</th>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">1</th>\n",
       "      <th>2010-01-01</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-01</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-03-01</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">2</th>\n",
       "      <th>2010-01-02</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-02</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-03-02</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">3</th>\n",
       "      <th>2010-01-03</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-02-03</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010-03-03</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                low  high  avg  avg_date\n",
       "day date                                \n",
       "1   2010-01-01    1     1    1         1\n",
       "    2010-02-01    1     1    1         1\n",
       "    2010-03-01    1     1    1         1\n",
       "2   2010-01-02    1     1    1         1\n",
       "    2010-02-02    1     1    1         1\n",
       "    2010-03-02    1     1    1         1\n",
       "3   2010-01-03    1     1    1         1\n",
       "    2010-02-03    1     1    1         1\n",
       "    2010-03-03    1     1    1         1"
      ]
     },
     "execution_count": 452,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gb = d.groupby(by=['day','date']).count() # 根据day分组，然后再根据date分组, count()聚合 返回的是MultiIndex的DataFrame\n",
    "type(gb)\n",
    "gb"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
